#!/usr/bin/env python
# coding: utf-8

# In[1]:



get_ipython().system('pip install pymysql')
get_ipython().system('pip install bs4')


# In[ ]:


import pymysql
import requests
import time
import re
from bs4 import BeautifulSoup
class mysql:#负责连接数据库的类
    db=pymysql.connect
    def __init__(self,url,username,pwd,dbname):
        self.db = pymysql.connect(url,username,pwd,dbname)
    def __del__(self):
        self.db.close()
    def do(self,sql):#sql:传给数据库执行的sql语句
        try:
            cursor = self.db.cursor()
            cursor.execute(sql)
            self.db.commit()
            time.sleep(0.5)
            return cursor.fetchall()
        except Exception:
            print(Exception)

def fun(a,url,yuanBookId):#a:数据库连接对象 url:小说地址
	html=requests.get(url)#获取网页内容
	#fo=open('1.html','w')
	#fo.write(r.text)
	soup=BeautifulSoup(html.text,'html.parser')#解析网页脚本

	def get_t(string):#根据选择器标示获取对应文本
		s=soup.select(string)
		if len(s)>0:
		    return s[0].get_text()
		return ''


	'''
	爬取小说名
	'''
	novelName=get_t('#novelName')
	print(novelName)

	'''
	爬取小说分类
	'''
	Type=get_t('.C-One > span:nth-of-type(1) > span:nth-of-type(1) > a:nth-of-type(3)')
	print(Type)

	'''
	爬取小说标签
	'''
	tags=soup.select('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Top > div.T-R-T-Box2 > div:nth-of-type(6)')
	_tags=''
	if len(tags)>0:
		array=tags[0].get_text().split('\n')[1:-1]
		_tags='|'.join(array)
		print(_tags)

	'''
	爬取小说开更时间
	'''
	start_time=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Top > div.T-R-T-Box2 > div:nth-of-type(5) > span > span')
	print(start_time)


	'''
	爬取小说最近更新时间
	'''
	end_time=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box1 > span > span')
	print(end_time)

	'''
	爬取小说月票
	'''
	yuepiao=get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(7) > div.C-Thr-B1-Box3.colorQianlan')
	print(yuepiao)

	'''
	爬取小说点击量
	'''
	dianji=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box2.fs14 > span:nth-of-type(3) > span')
	print(dianji)


	'''
	爬取打赏
	'''
	dashang = get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(5) > div.C-Thr-B1-Box3.colorQianlan')
	print(dashang)

	'''
	爬取鲜花
	'''
	xianhua = get_t('body > div.center > div.C-Three.mgTop20.bodyBorderShadow > div:nth-of-type(1) > div.C-Thr-B1-Box3.colorFenhong')
	print(xianhua)
	
	'''
    获取作者
    '''
	zuozhe=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Left > div.T-L-One > div.T-L-O-Zuo > div.T-L-O-Z-Box1 > a')
	print(zuozhe)
	'''
    获取字数
    '''
	_zishu=get_t('body > div.center > div.C-Two.bodyBorderShadow > div.Two-Right > div.T-R-Middle > div:nth-child(2)')
	array=re.findall('\d+',_zishu)
	zishu=0
	if len(array)>0:
		zishu=int(array[0])
	print(zishu)

	a.do('INSERT INTO novel(novel.`name`,website,type,tag,startTime,endTime,yuepiao,dianji,dashang,xianhua,yuanBookId,zuozhe,zishu) VALUES("{}","飞卢","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}","{}")'.format(novelName,Type,_tags,start_time,end_time,yuepiao,dianji,dashang,xianhua,yuanBookId,zuozhe,zishu))

if __name__=='__main__':
	a=mysql("47.94.92.138", "cq", "123456", "novel")#连接数据库
	for i in range(0,10):#根据网站小说链接地址规律不断生成链接，利用爬取一定数量的网络小说信息
		yuanBookId=650001+i
		url='https://b.faloo.com/f/{}.html'.format(yuanBookId)
		fun(a,url,yuanBookId)


# In[23]:


import requests
import time
import re
import pymysql
import pandas as pd
from bs4 import BeautifulSoup
class mysql:
    db=pymysql.connect
    def __init__(self,url,username,pwd,dbname):
        self.db = pymysql.connect(url,username,pwd,dbname)
    def __del__(self):
        self.db.close()
    def do(self,sql):
        try:
            cursor = self.db.cursor()
            cursor.execute(sql)
            self.db.commit()
            time.sleep(0.5)
            return cursor.fetchall()
        except Exception:
            print(Exception)

def fun(a,yuanBookId):
    url='https://b.faloo.com/f/{}.html'.format(yuanBookId)
    html=requests.get(url)#获取网页内容
    soup=BeautifulSoup(html.text,'html.parser')#解析网页脚本
    def get_t(string):#根据选择器标示获取对应文本
        s=soup.select('a[class="fs14 colorQianHui"]')
        if len(s)>0:
            return s[0].text
        return ''
    '''
    获取作者
    '''
    zuozhe=get_t('body>div.center>div.C-Two.bodyBorderShadow>div.Two-Left>div.T-L-One>div.T-L-O-Zuo>div.T-L-O-Z-Box1>a.fs14.colorQianHui')
    print(zuozhe)
   
    a.do('UPDATE novel SET zuozhe="{}" WHERE yuanBookId={}'.format(zuozhe,yuanBookId))


a=mysql("47.94.92.138", "cq", "123456", "novel")

bookIds=pd.read_sql('SELECT yuanBookId FROM novel WHERE zuozhe="" AND website="飞卢" ',a.db)
cc=[]
print(bookIds.yuanBookId[0])
fun(a,bookIds.yuanBookId[0])
for id in bookIds.yuanBookId:
    id=str(round(id,0))
    print(id)
    fun(a,str(id))


